other domains:
data_depression <- read_csv("../data/depression - Fried 2017/MatrixB.csv") #Load dat for estimating Jaccard index (no difference between specific and compound symptoms)
data_anxiety <- read_csv("../data/anxiety - Wall & Lee 2022/anxiety_wall_lee_jaccard.csv")
data_psychosis <- read_csv("../data/psychosis - Bernardin et al. 2023/psychosis - Bernardin et al. 2023.csv") |>
janitor::clean_names() %>%
mutate(across(everything(), ~ ifelse(. == 2, 1, .)))
data_hypomania <- read_csv("../data/hypomania - Chrobak et al. 2018/hypomania.csv")
data_eatingdisorders <- read_csv("../data/eating disorders - Christensen Pacella et al. 2024/MatrixB_EDMeasures_5-26-2023.csv")
data_youth_ocd <- read_csv("../data/ocd - Visiontay et al. 2019/youth_ocd.csv") |>
janitor::clean_names() %>%
mutate(across(everything(), ~ ifelse(. == 2, 1, .)))
# youth depression
# youth anxietyReal, R., & Vargas, J. M. (1996). The probabilistic basis of Jaccard’s index of similarity. Systematic Biology, 45(3), 380-385. DOI:10.1093/sysbio/45.3.380
cor_estimates <- function(.data){
data_mat_t <- .data |>
as.matrix() |>
t()
jaccard_diff <- proxy::dist(data_mat_t, method = "Jaccard") |> as.matrix()
jaccard_sim <- 1 - jaccard_diff
jaccard_sim[!lower.tri(jaccard_sim)] <- NA
mean_jaccard <- mean(jaccard_sim, na.rm = TRUE)
se_jaccard <- plotrix::std.error(as.vector(jaccard_sim), na.rm = TRUE)
return(data.frame(J_mean = mean_jaccard,
J_se = se_jaccard,
J_ci_lower = mean_jaccard - se_jaccard*1.96,
J_ci_upper = mean_jaccard + se_jaccard*1.96))
}
dat <- bind_rows(
data_depression |>
cor_estimates() |>
mutate(domain = "Depression"),
data_anxiety |>
cor_estimates() |>
mutate(domain = "Anxiety"),
data_psychosis |>
cor_estimates() |>
mutate(domain = "Psychosis"),
data_hypomania |>
cor_estimates() |>
mutate(domain = "(Hypo)mania"),
data_youth_ocd |>
cor_estimates() |>
mutate(domain = "(Youth) OCD"),
data_eatingdisorders |>
cor_estimates() |>
mutate(domain = "Eating Disorders")
) |>
mutate(logit_J_mean = qlogis(J_mean),
# Calculate SEs on the logit scale using the delta method
# SE_logit_p = SE_p / [p * (1 - p)]
logit_J_se = J_se / (J_mean * (1 - J_mean))) |>
relocate(domain, .before = J_mean)
dat |>
mutate_if(is.numeric, round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J_mean | J_se | J_ci_lower | J_ci_upper | logit_J_mean | logit_J_se |
|---|---|---|---|---|---|---|
| Depression | 0.41 | 0.02 | 0.37 | 0.46 | -0.35 | 0.10 |
| Anxiety | 0.26 | 0.01 | 0.23 | 0.29 | -1.06 | 0.07 |
| Psychosis | 0.19 | 0.01 | 0.17 | 0.22 | -1.42 | 0.08 |
| (Hypo)mania | 0.41 | 0.03 | 0.36 | 0.46 | -0.36 | 0.11 |
| (Youth) OCD | 0.38 | 0.02 | 0.34 | 0.42 | -0.49 | 0.10 |
| Eating Disorders | 0.22 | 0.02 | 0.18 | 0.26 | -1.29 | 0.12 |
# fit meta
fit <- rma(yi = logit_J_mean,
vi = logit_J_se^2,
data = dat,
method = "REML")
# create a forest plot
plot_forest <-
metafor::forest(fit,
transf = plogis,
slab = domain,
xlim = c(-0.8, 1.7),
at = c(0, .2, .4, .6, .8, 1),
efac = 1,
addpred = TRUE,
xlab = "Jaccard Similarity",
header = c("Domain", "J [95% CI]"),
refline = NA)# table of results
results_predictions <-
bind_cols(
fit |>
predict(transf = plogis, level = 50) |>
as_tibble() |>
dplyr::select(meta_J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower_50 = pi.lb, pi_upper_50 = pi.ub),
fit |>
predict(transf = plogis, level = 95) |>
as_tibble() |>
dplyr::select(pi_lower_95 = pi.lb, pi_upper_95 = pi.ub)
) |>
mutate_all(janitor::round_half_up, digits = 2)
results_predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| meta_J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|
| 0.3 | 0.28 | 0.33 | 0.24 | 0.38 | 0.14 | 0.55 |
escalc_jaccard <- function(.data, domain = NA) {
data_mat_t <- .data |>
as.matrix() |>
t()
jaccard_difference <- proxy::dist(data_mat_t, method = "Jaccard") |> as.matrix()
jaccard_similarity <- 1 - jaccard_difference
# Calculate SEs for each Jaccard similarity
var_names <- rownames(data_mat_t)
n_vars <- length(var_names)
# Initialize the SE matrix
se_matrix <- matrix(NA, nrow = n_vars, ncol = n_vars)
rownames(se_matrix) <- var_names
colnames(se_matrix) <- var_names
# Compute SEs
for (i in 1:n_vars) {
for (j in 1:n_vars) {
x <- data_mat_t[i, ]
y <- data_mat_t[j, ]
a <- sum(x == 1 & y == 1)
b <- sum(x == 1 & y == 0)
c <- sum(x == 0 & y == 1)
n <- a + b + c
J <- a / n
SE <- sqrt(J * (1 - J) / n)
se_matrix[i, j] <- SE
}
}
# Dynamic epsilon for zero-and-one adjustment
#epsilon <- 1 / n_vars # Proportional to the number of variables
epsilon <- 0.5 # common default, greater pull towards center and less influence of 0,1 values
# Apply zero-and-one adjustment
adjusted_jaccard_similarity <- (jaccard_similarity * (n_vars - 1) + epsilon) / n_vars
# Logit transform
logit_jaccard_similarity <- qlogis(adjusted_jaccard_similarity)
# Calculate SEs on the logit scale using the delta method
# SE_logit_p = SE_p / [p * (1 - p)]
se_logit_jaccard_similarity <- se_matrix / (adjusted_jaccard_similarity * (1 - adjusted_jaccard_similarity))
# Replace zero SEs with a principled lower bound
min_se <- sqrt(epsilon * (1 - epsilon) / n_vars)
se_logit_jaccard_similarity[se_logit_jaccard_similarity == 0] <- min_se
# Replace infinite values resulting from division by zero
se_logit_jaccard_similarity[!is.finite(se_logit_jaccard_similarity)] <- NA
# Create variable pair names
m <- adjusted_jaccard_similarity
## Get indices and values
indices <- which(lower.tri(m), arr.ind = TRUE)
vec <- m[lower.tri(m)]
## Create variable pair names
variable_pairs <- paste(rownames(m)[indices[, 1]], colnames(m)[indices[, 2]], sep = " - ")
# Create a data frame for effect sizes
dat_es <- tibble(
domain = domain,
J = jaccard_similarity[lower.tri(jaccard_similarity)],
yi = logit_jaccard_similarity[lower.tri(logit_jaccard_similarity)],
sei = se_logit_jaccard_similarity[lower.tri(se_logit_jaccard_similarity)],
scale1 = rownames(m)[indices[, 1]],
scale2 = colnames(m)[indices[, 2]],
pair = variable_pairs
)
return(dat_es)
}
rma_jaccard_three_level <- function(effect_sizes, title = "Scales"){
# fit a three-level meta-analysis model accounting for shared scales
fit <- rma.mv(yi = yi,
V = sei^2,
random = list(~ 1 | scale1, ~ 1 | scale2),
data = effect_sizes,
method = "REML")
# create a forest plot
plot_forest <-
metafor::forest(fit,
transf = plogis,
slab = effect_sizes$pair,
xlim = c(-0.8, 1.7),
at = c(0, .2, .4, .6, .8, 1),
efac = 0.2,
addpred = TRUE,
xlab = "Jaccard Similarity",
header = c(title, "J [95% CI]"),
refline = NA)
# table of results
results_predictions <-
bind_cols(
fit |>
predict(transf = plogis, level = 50) |>
as_tibble() |>
dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower_50 = pi.lb, pi_upper_50 = pi.ub),
fit |>
predict(transf = plogis, level = 95) |>
as_tibble() |>
dplyr::select(pi_lower_95 = pi.lb, pi_upper_95 = pi.ub)
) |>
mutate(domain = title) |>
relocate(domain, .before = J)
return(list(fit = fit,
predictions = results_predictions,
forest = plot_forest))
}es_depression <- data_depression |>
escalc_jaccard(domain = "Depression")
res_depression <- es_depression |>
rma_jaccard_three_level(title = "Depression scales")res_depression$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| Depression scales | 0.44 | 0.41 | 0.46 | 0.38 | 0.5 | 0.29 | 0.61 |
es_anxiety <- data_anxiety |>
escalc_jaccard(domain = "Anxiety")
res_anxiety <- es_anxiety |>
rma_jaccard_three_level(title = "Anxiety scales")res_anxiety$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| Anxiety scales | 0.27 | 0.24 | 0.29 | 0.19 | 0.36 | 0.1 | 0.55 |
es_psychosis <- data_psychosis |>
escalc_jaccard(domain = "Psychosis")
res_psychosis <- es_psychosis |>
rma_jaccard_three_level(title = "Psychosis scales")res_psychosis$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| Psychosis scales | 0.21 | 0.19 | 0.24 | 0.14 | 0.31 | 0.06 | 0.53 |
es_hypomania <- data_hypomania |>
escalc_jaccard(domain = "(Hypo)mania")
res_hypomania <- es_hypomania |>
rma_jaccard_three_level(title = "(Hypo)mania scales")res_hypomania$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| (Hypo)mania scales | 0.42 | 0.39 | 0.44 | 0.35 | 0.48 | 0.25 | 0.61 |
es_youth_ocd <- data_youth_ocd |>
escalc_jaccard(domain = "Youth OCD")
res_youth_ocd <- es_youth_ocd |>
rma_jaccard_three_level(title = "Youth OCD scales")res_youth_ocd$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| Youth OCD scales | 0.39 | 0.37 | 0.41 | 0.37 | 0.41 | 0.33 | 0.45 |
es_eatingdisorders <- data_eatingdisorders |>
escalc_jaccard(domain = "OCD")
res_eatingdisorders <- es_eatingdisorders |>
rma_jaccard_three_level(title = "Eating disorders scales")res_eatingdisorders$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| Eating disorders scales | 0.25 | 0.23 | 0.27 | 0.22 | 0.29 | 0.16 | 0.36 |
es_combined <- bind_rows(
es_depression,
es_anxiety,
es_psychosis,
es_hypomania,
es_eatingdisorders,
es_youth_ocd
)
rma_jaccard_four_level <- function(effect_sizes){
# Fit a four-level meta-analysis model accounting for shared scales
fit <- rma.mv(yi = yi,
V = sei^2,
random = list(~ 1 | domain/scale1, ~ 1 | domain/scale2),
data = effect_sizes,
method = "REML")
# Create a forest plot
plot_forest <-
meta::forest(fit,
transf = plogis,
slab = effect_sizes$pair,
xlim = c(-0.8, 1.7),
at = c(0, .2, .4, .6, .8, 1),
#cex = 0.4,
efac = 0.2,
addpred = TRUE,
xlab = "Jaccard Similarity",
header = c("Scales", "J [95% CI]"),
refline = NA)
results_predictions <-
bind_cols(
fit |>
predict(transf = plogis, level = 50) |>
as_tibble() |>
dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower_50 = pi.lb, pi_upper_50 = pi.ub),
fit |>
predict(transf = plogis, level = 95) |>
as_tibble() |>
dplyr::select(pi_lower_95 = pi.lb, pi_upper_95 = pi.ub)
) |>
mutate(domain = "4-level RE") |>
relocate(domain, .before = J)
return(list(fit = fit,
predictions = results_predictions,
forest = plot_forest))
}
res_combined <- rma_jaccard_four_level(es_combined)res_combined$predictions |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| 4-level RE | 0.31 | 0.29 | 0.34 | 0.22 | 0.42 | 0.1 | 0.64 |
#res_combined$fit
# Combine estimates and labels into a data frame for easy viewing
tibble(label = c("domain", "domain/scale1", "domain again", "domain/scale2")) |>
mutate(logit_sigma = res_combined$fit$sigma2,
sigma = round_half_up(plogis(logit_sigma), 2)) |>
filter(label != "domain again") |>
select(-logit_sigma) |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| label | sigma |
|---|---|
| domain | 0.52 |
| domain/scale1 | 0.54 |
| domain/scale2 | 0.53 |
3- and 4-level meta
predictions_combined_by_domain <- bind_rows(
res_depression$predictions,
res_anxiety$predictions,
res_psychosis$predictions,
res_hypomania$predictions,
res_youth_ocd$predictions,
res_eatingdisorders$predictions,
res_combined$predictions
) |>
mutate(domain = fct_rev(fct_relevel(domain,
"Depression scales",
"Anxiety scales",
"Psychosis scales",
"(Hypo)mania scales",
"Youth OCD scales",
"Eating disorders scales",
"4-level RE")))
predictions_combined_by_domain |>
mutate_if(is.numeric, janitor::round_half_up, digits = 2) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower_50 | pi_upper_50 | pi_lower_95 | pi_upper_95 |
|---|---|---|---|---|---|---|---|
| Depression scales | 0.44 | 0.41 | 0.46 | 0.38 | 0.50 | 0.29 | 0.61 |
| Anxiety scales | 0.27 | 0.24 | 0.29 | 0.19 | 0.36 | 0.10 | 0.55 |
| Psychosis scales | 0.21 | 0.19 | 0.24 | 0.14 | 0.31 | 0.06 | 0.53 |
| (Hypo)mania scales | 0.42 | 0.39 | 0.44 | 0.35 | 0.48 | 0.25 | 0.61 |
| Youth OCD scales | 0.39 | 0.37 | 0.41 | 0.37 | 0.41 | 0.33 | 0.45 |
| Eating disorders scales | 0.25 | 0.23 | 0.27 | 0.22 | 0.29 | 0.16 | 0.36 |
| 4-level RE | 0.31 | 0.29 | 0.34 | 0.22 | 0.42 | 0.10 | 0.64 |
ggplot(predictions_combined_by_domain, aes(J, domain)) +
# geom_errorbarh(aes(xmin = pi_lower, xmax = pi_upper), linetype = "dotted", height = 0.26) +
# geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", height = 0.26, size = 0.85) +
geom_linerange(aes(xmin = pi_lower_95, xmax = pi_upper_95), linetype = "solid", linewidth = 0.3) +
geom_linerange(aes(xmin = pi_lower_50, xmax = pi_upper_50), linetype = "solid", linewidth = 0.6, color = "red") +
geom_linerange(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", linewidth = 1.2) +
geom_point(size = 2.4, color = "red") +
theme_linedraw() +
scale_x_continuous(limits = c(0,1), breaks = scales::breaks_pretty(n = 10), name = "Jaccard Similarity") +
ylab("")## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.4.0 knitr_1.50 scales_1.4.0
## [4] ggstance_0.3.7 ggplot2_4.0.1 metafor_4.8-0
## [7] numDeriv_2016.8-1.1 metadat_1.4-0 Matrix_1.7-3
## [10] janitor_2.2.1 proxy_0.4-29 forcats_1.0.1
## [13] tibble_3.3.0 readr_2.1.5 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.54 bslib_0.9.0 CompQuadForm_1.4.4
## [5] lattice_0.22-6 mathjaxr_1.8-0 tzdb_0.5.0 Rdpack_2.6.4
## [9] vctrs_0.6.5 tools_4.5.0 generics_0.1.4 parallel_4.5.0
## [13] pkgconfig_2.0.3 RColorBrewer_1.1-3 S7_0.2.1 lifecycle_1.0.4
## [17] compiler_4.5.0 farver_2.1.2 stringr_1.6.0 textshaping_1.0.3
## [21] snakecase_0.11.1 htmltools_0.5.9 sass_0.4.10 yaml_2.3.12
## [25] nloptr_2.2.1 pillar_1.11.1 crayon_1.5.3 jquerylib_0.1.4
## [29] MASS_7.3-65 cachem_1.1.0 meta_8.2-1 reformulas_0.4.1
## [33] boot_1.3-31 nlme_3.1-168 tidyselect_1.2.1 digest_0.6.39
## [37] stringi_1.8.7 purrr_1.2.0 splines_4.5.0 fastmap_1.2.0
## [41] grid_4.5.0 archive_1.1.12.1 cli_3.6.5 magrittr_2.0.4
## [45] withr_3.0.2 plotrix_3.8-4 bit64_4.6.0-1 lubridate_1.9.4
## [49] timechange_0.3.0 rmarkdown_2.30 bit_4.6.0 lme4_1.1-37
## [53] ragg_1.4.0 hms_1.1.3 evaluate_1.0.5 rbibutils_2.3
## [57] viridisLite_0.4.2 rlang_1.1.6 Rcpp_1.1.0 glue_1.8.0
## [61] xml2_1.4.0 minqa_1.2.8 svglite_2.2.1 rstudioapi_0.17.1
## [65] vroom_1.6.6 jsonlite_2.0.0 R6_2.6.1 systemfonts_1.2.3